Registered S3 method overwritten by 'data.table':
method from
print.data.table
df_lyrics
Error: object 'df_lyrics' not found
df_audio_features <- df_audio_features_raw %>%
group_by(track_name, external_urls_spotify) %>%
mutate(artist_all = paste(artist_name, collapse = ",|,")) %>%
ungroup() %>%
mutate(artist_key = sub(",|,.*", "", artist_all)) %>%
dplyr::select(artist_name, artist_all, artist_key, everything(.)) %>%
distinct(artist_key, external_urls_spotify, .keep_all = T) %>%
as.data.frame()
Error in df_audio_features_raw %>% group_by(track_name, external_urls_spotify) %>% :
could not find function "%>%"
cant_marketsdf_charts <- df_charts_raw %>%
group_by(Artist, Track_Name, URL) %>%
dplyr:: summarise(semanas_sum = n(),
streams_sum = (sum(Streams, na.rm = T)/10^6 ),
streams_min = (min(Streams)/10^6 ),
streams_max = (max(Streams)/10^6 ),
position_avg = mean(Position, na.rm = T),
position_min = min(Position),
position_max = max(Position)) %>%
ungroup() %>%
mutate(indicador = as.numeric(streams_sum*semanas_sum/position_avg) )
`summarise()` has grouped output by 'Artist', 'Track_Name'. You can override using the `.groups` argument.
audio_features Y charts#Armamos un join para tener una tabla de charts con las caracteristicas de las canciones
# deberian quedar 22993 filas completas
join_audio_charts <- df_audio_features %>%
select("artist_name","artist_all","artist_key",
"track_name", "external_urls_spotify", "album_name", "album_release_year",
all_of(features_continuas), all_of(features_categoricas)) %>%
right_join( df_charts,# %>%
by = c(
"track_name" = "Track_Name",
"artist_key" ="Artist",
"external_urls_spotify" = "URL"))
#HAY CHARTS QUE NO TIENEN FEATURES. HAY QUE TENERLO EN CUENTA PARA EL ANÁLISIS
library(mice)
md.pattern(join_audio_charts, rotate.names = TRUE)
popularidad[is.na(popularidad$indicador),]
#Agregación de todas las semanas en charts
features_continuas <- c('acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence', 'cant_markets')
features_categoricas <- c('explicit', 'key_name', 'mode_name', "key_mode", "album_type")
groupping_cols <- c("artist_name","artist_all","artist_key","track_name","external_urls_spotify","album_name","album_release_year")
numeric_col_charts <- c("Position","Streams")
week_start <- c("week_start")
chart_group <- join_audio_charts %>%
group_by(artist_name,artist_all,artist_key,track_name,external_urls_spotify,album_name,album_release_year)
continuas_summarized = chart_group %>% summarise_at(features_continuas, mean, na.rm = TRUE)
categoricas_summarizes = chart_group %>% summarise_at(features_categoricas, first)
numeric_charts_summarizes = chart_group %>% summarise(across(numeric_col_charts, list(min=min,max=max,avg=mean)))
Note: Using an external vector in selections is ambiguous.
i Use `all_of(numeric_col_charts)` instead of `numeric_col_charts` to silence this message.
i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.
Error: Problem with `summarise()` input `..1`.
x Can't subset columns that don't exist.
x Columns `Position` and `Streams` don't exist.
i Input `..1` is `(function (.cols = everything(), .fns = NULL, ..., .names = NULL) ...`.
i The error occurred in group 1: artist_name = "*NSYNC", artist_all = "*NSYNC", artist_key = "*NSYNC", track_name = "Merry Christmas, Happy Holidays", external_urls_spotify = "https://open.spotify.com/track/15coTBAzEN1bOeipoNDZAR", album_name = "Home For Christmas", album_release_year = 1998.
Run `rlang::last_error()` to see where the error occurred.
df_lyrics_unicas <- df_lyrics %>% distinct(artist_name, track_name, lyrics)
nrow(df_lyrics_unicas)
df_chart_w_lyrics <- merge(join_audio_charts, df_lyrics_unicas, by.x = c("artist_name","track_name"), by.y= c("artist_name","track_name"), all.x=TRUE, all.y = FALSE)
df_chart_w_lyrics <- df_chart_w_lyrics[!is.na(df_chart_w_lyrics$lyrics),]
df_chart_w_lyrics[, "lyrics"]